
library(tidyverse)

load("eavs.RData")

## Table A4
eavs_missing = eavs %>%
  mutate(missing = case_when(is.na(pct_early) ~ 1,
                             TRUE ~ 0)) %>%
  group_by(state, year) %>%
  mutate(num_counties = n()) %>%
  ungroup() %>%
  group_by(state, year, num_counties) %>%
  summarise(pct_missing = round(sum(missing)/num_counties*100, digits = 2)) %>%
  distinct(state, year, pct_missing) %>%
  pivot_wider(names_from = year, values_from = pct_missing) %>%
  select(-num_counties)


## Table A5
missing_compare = eavs %>%
  mutate(county_fips = as.numeric(county_fips),
         missing = case_when(is.na(pct_early) ~ 1,
                             TRUE ~ 0)) %>%
  group_by(state, county_fips) %>%
  summarise(tot_missing = sum(missing)) %>%
  ungroup() %>%
  mutate(included = case_when(tot_missing == 3 | tot_missing == 4 ~ 0,
                              TRUE ~ 1))

## Bring in Census data
census = read.csv("R12952419_SL050.csv") %>%
  select(Geo_FIPS, PCT_SE_A04001_003, PCT_SE_A12001_005, PCT_SE_A12001_006, PCT_SE_A12001_007, 
         PCT_SE_A12001_008, SE_A14010_001, SE_A16002_001) %>%
  mutate(pct_college = PCT_SE_A12001_005 + PCT_SE_A12001_006 + PCT_SE_A12001_007 +
           PCT_SE_A12001_008) %>%
  rename(pct_white = PCT_SE_A04001_003, income = SE_A14010_001, age18 = SE_A16002_001) %>%
  select(-PCT_SE_A12001_005, -PCT_SE_A12001_006, -PCT_SE_A12001_007, 
         -PCT_SE_A12001_008)

## Bring in 2008 presidential partisanship and turnout
pres = read.csv("countypres_2000-2020.csv") %>%
  filter(year==2008 & party=="DEMOCRAT") %>%
  mutate(pct_dem = round(candidatevotes/totalvotes*100, digits = 2)) %>%
  select(pct_dem, totalvotes, county_fips)

missing_compare = missing_compare %>%
  left_join(., census, by = c("county_fips" = "Geo_FIPS")) %>%
  left_join(., pres, by = "county_fips") %>%
  mutate(turnout08 = round(totalvotes/age18*100, digits = 2),
         turnout08 = case_when(turnout08 > 100 ~ NA_real_,
                               turnout08 <= 100 ~ turnout08,
                               TRUE ~ NA_real_))

## Averages: all EAVS counties
mean(missing_compare$pct_college, na.rm = TRUE)
mean(missing_compare$pct_white, na.rm = TRUE)
mean(missing_compare$income, na.rm = TRUE)
mean(missing_compare$pct_dem, na.rm = TRUE)
mean(missing_compare$turnout08, na.rm = TRUE)

## Averages: counties included
mean(missing_compare$pct_college[missing_compare$included == 1], na.rm = TRUE)
mean(missing_compare$pct_white[missing_compare$included == 1], na.rm = TRUE)
mean(missing_compare$income[missing_compare$included == 1], na.rm = TRUE)
mean(missing_compare$pct_dem[missing_compare$included == 1], na.rm = TRUE)
mean(missing_compare$turnout08[missing_compare$included == 1], na.rm = TRUE)

## T-test
t.test(missing_compare$pct_college, missing_compare$pct_college[missing_compare$included == 1])
t.test(missing_compare$pct_white, missing_compare$pct_white[missing_compare$included == 1])
t.test(missing_compare$income, missing_compare$income[missing_compare$included == 1])
t.test(missing_compare$pct_dem, missing_compare$pct_dem[missing_compare$included == 1])
t.test(missing_compare$turnout08, missing_compare$turnout08[missing_compare$included == 1])

